Data overview:
# define variables to include (will remove CYAN and PEYS from some analyses)
benthic_var <- c("lc", "cca", "ta", "tas", "fma", "cma", "cyan", "ainv", "peys")
benthic_cover_trans <-
# historical data
read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "BenthicPointCoverScaled", "BenthicPointCoverScaledByTransect.xlsx"), sheet = "Data") %>%
clean_names() %>%
mutate(year = year(date),
trans = as.character(trans), # to bind with 2025 NPA data with initials
t_tas = tas + tam) %>%
select(site, year, trans, benthic_var) %>%
mutate(across(benthic_var, ~ . / 100)) %>% # convert to proportion format
# bind 2025 data
bind_rows(
bind_rows(
# NEMMA
read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "Overall") %>%
clean_names() %>%
mutate(`transect_name` = as.character(`transect_name`)) %>% # to bind with 2025 NPA data with initials
# incorporate TA sheet
left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "tTA") %>%
clean_names() %>%
select(transect_id, ta, sta, tas, tam)),
# NDNP
read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "Overall") %>%
clean_names() %>%
# incorporate TA sheet
left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "tTA") %>%
clean_names() %>%
select(transect_id, ta, sta, tas, tam))) %>%
mutate(year = 2025,
survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name),
tas_sum = tas + sta + tam) %>%
select(site = survey_name, year, trans = transect_name, lc = t_coral, cca = t_cca, ta, tas = tas_sum, fma = t_fma, cma = t_cma, cyan = t_cyan, ainv = t_ainv, peys = t_pey)) %>%
mutate(site = if_else(str_detect(site, "^Site \\d+"),
str_extract(site, "^Site \\d+"),
site), # dealing with NPA sites that have site numbers with different name variations afterwards
site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
if_else(str_detect(site, "Redonda"), "Redonda",
"NEMMA"))) %>%
filter(year != 2022) %>% # 2022 was incomplete data collection year for NPA
group_by(site) %>%
filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
ungroup()
benthic_cover_site <- benthic_cover_trans %>%
group_by(site, site_cat, year) %>%
rename_with(~ str_remove(.x, "t_")) %>%
summarise(across(c(benthic_var),
list(mean = ~ mean(.x, na.rm = TRUE),
sd = ~ sd(.x)),
.names = "{.col}_{.fn}")) %>%
ungroup()
benthic_cover_year <- benthic_cover_site %>%
select(!contains("_sd")) %>%
rename_with(~ str_remove(.x, "_mean")) %>%
group_by(year, site_cat) %>%
summarise(across(c(benthic_var),
list(mean = ~ mean(.x, na.rm = TRUE),
sd = ~ sd(.x)),
.names = "{.col}_{.fn}")) %>%
ungroup()
write.csv(benthic_cover_year, here("agrra_monitoring", "data_outputs", "benthic_cover_year.csv"))| year | site_cat | lc_mean | lc_sd | cca_mean | cca_sd | ta_mean | ta_sd | tas_mean | tas_sd | fma_mean | fma_sd | cma_mean | cma_sd | cyan_mean | cyan_sd | ainv_mean | ainv_sd | peys_mean | peys_sd |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2017 | NEMMA | 0.1200074 | 0.0500671 | 0.1000121 | 0.0563351 | 0.1372388 | 0.0637481 | 0.1908383 | 0.1405502 | 0.2124538 | 0.0868144 | 0.0669921 | 0.0568517 | 0.0376893 | 0.0553593 | 0.0431588 | 0.0137899 | 0.0536530 | 0.0266282 |
| 2019 | NDNP | 0.0563083 | 0.0163313 | 0.0317857 | 0.0217454 | 0.0915333 | 0.1307974 | 0.5200929 | 0.1379033 | 0.1445060 | 0.0624581 | 0.0327048 | 0.0334719 | 0.0148095 | 0.0151115 | 0.0276940 | 0.0163452 | 0.0076321 | 0.0043003 |
| 2025 | NDNP | 0.0268379 | 0.0110233 | 0.0408207 | 0.0463093 | 0.0041743 | 0.0068380 | 0.4374636 | 0.1171587 | 0.3195486 | 0.0726502 | 0.0148298 | 0.0190004 | 0.0143074 | 0.0236870 | 0.0177629 | 0.0094187 | 0.0355738 | 0.0224526 |
| 2025 | NEMMA | 0.0422875 | 0.0308934 | 0.1540312 | 0.0875113 | 0.0435583 | 0.0454585 | 0.1662500 | 0.1147224 | 0.4294562 | 0.1503852 | 0.0386479 | 0.0361970 | 0.0168750 | 0.0210095 | 0.0237000 | 0.0283759 | 0.0551625 | 0.0429563 |
benthic_2025 <- benthic_cover_year %>%
filter(year == 2025) %>%
select(
site_cat,
lc_mean, lc_sd,
cca_mean, cca_sd,
ta_mean, ta_sd,
tas_mean, tas_sd,
fma_mean, fma_sd,
cma_mean, cma_sd,
ainv_mean, ainv_sd
) %>%
pivot_longer(
cols = -site_cat, # exclude site_cat
names_to = c("metric", "stat"),
names_sep = "_",
values_to = "value"
) %>%
pivot_wider(
names_from = stat,
values_from = value
) %>%
mutate(
metric = recode(
metric,
lc = "Live coral",
cca = "CCA",
ta = "Turf algae",
tas = "Turf + sediment",
fma = "Fleshy macroalgae",
cma = "Crustose macroalgae",
ainv = "Other inverts"
),
metric = factor(metric, levels = c(
"Live coral",
"CCA",
"Crustose macroalgae",
"Turf algae",
"Turf + sediment",
"Fleshy macroalgae",
"Other inverts"
))
)
ggplot(benthic_2025, aes(x = metric, y = mean, fill = metric)) +
geom_col(
position = position_dodge(width = 0.8),
width = 0.7,
color = "black",
fill = "gray"
) +
geom_errorbar(
aes(ymin = mean - sd, ymax = mean + sd),
width = 0.2,
position = position_dodge(width = 0.8)
) +
facet_wrap(~ site_cat) +
scale_y_continuous(
labels = scales::percent_format(accuracy = 1),
expand = expansion(mult = c(0, 0.05))
) +
labs(
x = NULL,
y = "Mean benthic cover (2025)"
) +
theme_bw() +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank(),
strip.text = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
)make_lp <- function(df, yvar, ylab) {
p <- ggplot(df, aes(x = year, y = {{ yvar }}, group = site, shape = site)) +
geom_point() +
scale_shape_manual(values = 0:35) +
geom_line() +
scale_y_continuous(labels = label_percent()) +
labs(x = "Year", y = ylab, color = "Site", group = "Site") +
theme_minimal()
p
}
make_lp(df = benthic_cover_site, lc_mean, "Live coral cover")
make_lp(df = benthic_cover_site, fma_mean, "FMA cover")
make_lp(df = benthic_cover_site, ta_mean, "TA cover")
make_lp(df = benthic_cover_site, tas_mean, "TA-sediment cover")# preparing data - generating deltas for each metric/site_cat
benthic_cover_site_chg <- benthic_cover_site %>%
group_by(site_cat) %>%
mutate(
timepoint = if_else(year == min(year), "baseline", "2025")
) %>%
ungroup() %>%
pivot_wider(
id_cols = c(site, site_cat),
names_from = timepoint,
values_from = c(lc_mean, cca_mean, ta_mean, tas_mean, fma_mean, cma_mean, ainv_mean),
names_sep = "_"
) %>%
mutate(
d_lc = lc_mean_2025 - lc_mean_baseline,
d_cca = cca_mean_2025 - cca_mean_baseline,
d_ta = ta_mean_2025 - ta_mean_baseline,
d_tas = tas_mean_2025 - tas_mean_baseline,
d_fma = fma_mean_2025 - fma_mean_baseline,
d_cma = cma_mean_2025 - cma_mean_baseline,
d_ainv = ainv_mean_2025 - ainv_mean_baseline
) %>%
pivot_longer(
cols = starts_with("d_"),
names_to = "metric",
values_to = "delta"
) %>%
mutate(
metric = recode(metric,
d_lc = "Live coral",
d_cca = "CCA",
d_ta = "Turf algae",
d_tas = "Turf + sediment",
d_fma = "Fleshy macroalgae",
d_cma = "Calc. macroalgae",
d_ainv = "Aggressive inverts"
)
) %>%
select(site, site_cat, metric, delta) %>%
mutate(delta_pct = delta * 100)
# generating stats table
# Function to compute bootstrapped mean CI
boot_mean_ci <- function(x, n_boot = 1000, conf = 0.95) {
# Boot function
boot_fn <- function(data, indices) mean(data[indices])
b <- boot::boot(x, statistic = boot_fn, R = n_boot)
ci <- boot::boot.ci(b, type = "perc")$percent[4:5] # lower, upper
return(ci)
}
delta_table <- benthic_cover_site_chg %>%
group_by(site_cat, metric) %>%
summarise(
mean_delta = mean(delta_pct, na.rm = TRUE),
ci = list(boot_mean_ci(delta_pct)),
wilcox_p = wilcox.test(delta_pct, mu = 0)$p.value,
.groups = "drop"
) %>%
mutate(
ci_lower = sapply(ci, `[[`, 1),
ci_upper = sapply(ci, `[[`, 2)
) %>%
select(-ci) %>%
mutate(
mean_delta = round(mean_delta, 2),
ci_lower = round(ci_lower, 2),
ci_upper = round(ci_upper, 2),
wilcox_p = signif(wilcox_p, 3)
) %>%
arrange(metric, site_cat)
kable(delta_table)| site_cat | metric | mean_delta | wilcox_p | ci_lower | ci_upper |
|---|---|---|---|---|---|
| NDNP | Aggressive inverts | -0.99 | 0.153000 | -2.13 | 0.07 |
| NEMMA | Aggressive inverts | -1.95 | 0.148000 | -3.50 | -0.01 |
| NDNP | CCA | 0.90 | 0.463000 | -0.63 | 2.76 |
| NEMMA | CCA | 5.40 | 0.313000 | -1.86 | 12.43 |
| NDNP | Calc. macroalgae | -1.79 | 0.003050 | -3.04 | -0.69 |
| NEMMA | Calc. macroalgae | -2.83 | 0.109000 | -5.63 | 0.19 |
| NDNP | Fleshy macroalgae | 17.50 | 0.000122 | 12.55 | 22.51 |
| NEMMA | Fleshy macroalgae | 21.70 | 0.007810 | 13.11 | 29.77 |
| NDNP | Live coral | -2.95 | 0.002320 | -4.16 | -1.66 |
| NEMMA | Live coral | -7.77 | 0.015600 | -11.24 | -3.44 |
| NDNP | Turf + sediment | -8.26 | 0.041900 | -15.61 | -1.16 |
| NEMMA | Turf + sediment | -2.46 | 1.000000 | -11.91 | 5.28 |
| NDNP | Turf algae | -8.74 | 0.000610 | -16.69 | -3.87 |
| NEMMA | Turf algae | -9.37 | 0.023400 | -14.76 | -2.95 |
# plotting - all metrics
ggplot(delta_table,
aes(x = mean_delta, y = metric)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
geom_errorbarh(
aes(xmin = ci_lower, xmax = ci_upper),
height = 0.2
) +
geom_point(size = 3) +
facet_wrap(~ site_cat, scales = "free_y") +
labs(
x = "Mean change (Δ)",
y = NULL
) +
theme_bw() +
theme(
strip.background = element_blank(),
strip.text = element_text(face = "bold"),
panel.grid.major.y = element_blank()
)# plotting select metrics
pd <- position_dodge(width = 0.6)
ggplot(delta_table,
aes(x = mean_delta, y = metric, color = site_cat)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
geom_errorbarh(
aes(xmin = ci_lower, xmax = ci_upper),
height = 0.2,
position = pd
) +
geom_point(
size = 3,
position = pd
) +
labs(
x = "Mean change (Δ) in percent cover",
y = NULL,
color = "Location"
) +
theme_bw() +
theme(
strip.background = element_blank(),
strip.text = element_text(face = "bold"),
panel.grid.major.y = element_blank()
)# plotting - indv. metrics
plot_delta <- function(df, metric_name, y_label = NULL) {
df_metric <- df %>% filter(metric == metric_name)
if (is.null(y_label)) {
y_label <- paste0("Change in percent ", metric_name, " cover")
}
# Compute symmetric limits around zero
max_abs <- max(abs(df_metric$delta_pct), na.rm = TRUE)
y_limits <- c(-max_abs, max_abs)
ggplot(df_metric, aes(x = site_cat, y = delta_pct)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey50") +
geom_quasirandom(alpha = 0.5, size = 2, width = 0.2) +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 0.8) +
stat_summary(fun = mean, geom = "point", size = 3, color = "black") +
theme_classic() +
coord_flip() +
labs(x = "", y = y_label) +
ylim(y_limits) # <- this line sets symmetric y-axis
}
p_fma <- plot_delta(benthic_cover_site_chg, "Fleshy macroalgae")
p_lc <- plot_delta(benthic_cover_site_chg, "Live coral")
p_delta_indv <- p_fma / p_lc # patchwork uses / for vertical layout
ggsave(here("agrra_monitoring", "figs", "benthic_delta_indv.png"), plot = p_delta_indv, width = 6, height = 8)
print(p_delta_indv)make_bp <- function(df, site_name, yvar, ylab,
y_max = NULL, test_method = "wilcox.test",
show_ns = FALSE, remove_x_text = FALSE,
title = TRUE, remove_y_text = FALSE) {
df_sub <- df %>%
filter(site_cat == site_name) %>%
mutate(year = factor(as.character(year), levels = c("2017", "2019", "2025")))
yrs <- sort(unique(as.character(df_sub$year)))
comps <- if (length(yrs) >= 2) combn(yrs, 2, simplify = FALSE) else list()
yvar_sym <- rlang::sym(yvar)
p <- ggplot(df_sub, aes(x = year, y = !!yvar_sym)) +
geom_boxplot(outlier.size = 1, width = 0.6) +
geom_jitter(width = 0.15, size = 1, alpha = 0.6) +
scale_y_continuous(labels = label_percent()) +
labs(x = "Year",
y = if (!remove_y_text) ylab else NULL,
title = if (title) site_name else NULL) +
theme_pub() +
theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5))
if (remove_y_text) {
p <- p + theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank()
)
}
if (remove_x_text) {
p <- p + theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank()
)
}
if (!is.null(y_max)) {
p <- p + coord_cartesian(ylim = c(0, y_max))
}
if (length(comps) > 0) {
p <- p + stat_compare_means(
method = test_method,
comparisons = comps,
label = "p.signif",
hide.ns = !show_ns,
vjust = 0)
}
p
}
p1 <- make_bp(benthic_cover_site, "NEMMA",
yvar = "lc_mean", ylab = "Live coral",
y_max = 0.30,
remove_x_text = TRUE,
title = TRUE,
remove_y_text = FALSE)
p2 <- make_bp(benthic_cover_site, "NDNP",
yvar = "lc_mean",
y_max = 0.30,
remove_x_text = TRUE,
title = TRUE,
remove_y_text = TRUE)
p3 <- make_bp(benthic_cover_site, "NEMMA",
yvar = "fma_mean", ylab = "Fleshy macroalgae",
y_max = 0.80,
title = FALSE,
remove_y_text = FALSE)
p4 <- make_bp(benthic_cover_site, "NDNP",
yvar = "fma_mean",
y_max = 0.80,
title = FALSE,
remove_y_text = TRUE)
final_plot <- (p1 | p2) / (p3 | p4)
final_plot
ggsave(here("agrra_monitoring", "figs", "lc_fma_cover.png"), plot = final_plot, width = 8, height = 6)make_rc <- function(df, site_cat, title = NULL,
line_colors = c("coral", "skyblue4")) {
# Filter the site
df_site <- df %>%
filter(site_cat == site_cat)
# Extract mean columns only
mean_cols <- df_site %>% select(contains("_mean"))
# Compute max PC across all mean vars
max_pc <- mean_cols %>% unlist() %>% max(na.rm = TRUE)
# Count mean columns
ncol_pc <- ncol(mean_cols)
# Make radar data
benthic_rdr <- rbind(
rep(max_pc, ncol_pc), # max row
rep(0, ncol_pc), # min row
mean_cols %>% rename_with(~ gsub("_mean", "", .x))
)
# ---- FIX: convert years to character before make.unique() ----
rownames(benthic_rdr) <- c(
"max",
"min",
make.unique(as.character(df_site$year), sep = "_")
)
# Colors
fill_colors <- alpha(line_colors, 0.5)
# ---- Plot ----
radarchart(
benthic_rdr,
pcol = line_colors,
pfcol = fill_colors,
cglcol = "grey",
title = ifelse(is.null(title), site_cat, title)
)
# ---- Legend ----
legend(
"topright",
legend = rownames(benthic_rdr)[3:nrow(benthic_rdr)],
col = line_colors,
fill = fill_colors,
lty = 1,
lwd = 2,
bty = "n",
cex = 0.8
)
}
make_rc(benthic_cover_year, site_cat = "NEMMA")max_pc <- benthic_cover_year %>%
filter(site_cat == "NEMMA") %>%
select(contains("_mean")) %>%
unlist() %>%
max(na.rm = TRUE)
ncol_pc <- ncol(benthic_cover_year %>%
filter(site_cat == "NEMMA") %>%
select(contains("_mean")))
benthic_rdr <-
rbind(rep(max_pc, ncol_pc),
rep(0, ncol_pc),
benthic_cover_year %>%
filter(site_cat == "NEMMA") %>%
column_to_rownames(var = "year") %>%
select(contains("_mean")) %>%
rename_with(~ gsub("_mean", "", .x))
)
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)
radarchart(benthic_rdr,
pcol = line_colors,
pfcol = fill_colors,
cglcol = "grey")
legend(
x = "topright",
legend = rownames(benthic_rdr)[3:4], # skip max/min rows
col = line_colors,
lty = 1,
lwd = 2,
bty = "n",
cex = 0.8,
pt.cex = 1.5,
fill = fill_colors
)max_pc <- benthic_cover_year %>%
filter(site_cat == "NDNP") %>%
select(contains("_mean")) %>%
unlist() %>%
max(na.rm = TRUE)
ncol_pc <- ncol(benthic_cover_year %>%
filter(site_cat == "NDNP") %>%
select(contains("_mean")))
benthic_rdr <-
rbind(rep(max_pc, ncol_pc),
rep(0, ncol_pc),
benthic_cover_year %>%
filter(site_cat == "NDNP") %>%
column_to_rownames(var = "year") %>%
select(contains("_mean")) %>%
rename_with(~ gsub("_mean", "", .x))
)
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)
radarchart(benthic_rdr,
pcol = line_colors,
pfcol = fill_colors,
cglcol = "grey")
# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
x = "topright",
legend = rownames(benthic_rdr)[3:4], # skip max/min rows
col = line_colors,
lty = 1,
lwd = 2,
bty = "n", # no box
cex = 0.8,
pt.cex = 1.5,
fill = fill_colors # show fill colors in legend
)Removing cyanobacteria here due to seasonality and peysonnelids in NDNP due to lack in confidence about identification
nmds_var <- c("lc", "cca", "ta", "tas", "fma", "cma", "ainv")
benthic_nmds <- benthic_cover_site %>%
rename_with(~ gsub("_mean", "", .x)) %>%
mutate(period = ifelse(year == "2025", "2025", "2017-2019")) %>%
select(site, site_cat, period, nmds_var) %>%
ungroup()
benthic_meta <- benthic_nmds %>%
ungroup() %>%
select(site, site_cat, period)
benthic_pc <- benthic_nmds %>%
ungroup() %>%
select(-site, -site_cat, -period)
set.seed(123)
benthic_nmds_ord <- metaMDS(
benthic_pc,
distance = "bray",
k = 2,
trymax = 100,
trace = 0
)
site_scores <- scores(benthic_nmds_ord, display = "sites") %>%
as.data.frame() %>%
mutate(site = benthic_meta$site,
site_cat = benthic_meta$site_cat,
period = benthic_meta$period)
pc_scores <- scores(benthic_nmds_ord, display = "species") %>%
as.data.frame() %>%
rownames_to_column(var = "category")
arrow_mult <- 2
pc_scores_scaled <- pc_scores %>%
mutate(
NMDS1 = NMDS1 * arrow_mult,
NMDS2 = NMDS2 * arrow_mult,
label_nudge_x = ifelse(NMDS1 >= 0, 0.05, -0.05),
label_nudge_y = ifelse(NMDS2 >= 0, 0.05, -0.05)
)
p <- ggplot(site_scores, aes(x = NMDS1, y = NMDS2, color = period)) +
geom_point(size = 1, alpha = 0.4) +
stat_ellipse(aes(group = period), type = "t", linetype = 2) +
facet_wrap(~ site_cat) +
# excluding arrows in publication version
# geom_segment(data = pc_scores_scaled,
# aes(x = 0, y = 0, xend = NMDS1, yend = NMDS2),
# arrow = arrow(length = unit(0.3, "cm")),
# color = "black",
# inherit.aes = FALSE) +
# geom_text(data = pc_scores_scaled,
# aes(x = NMDS1 + label_nudge_x,
# y = NMDS2 + label_nudge_y,
# label = category),
# color = "black", size = 3, inherit.aes = FALSE) +
theme_pub() +
labs(color = "Year") +
theme(strip.text = element_text(size = 14, face = "bold"),
legend.position = "bottom")
p# PERMANOVA
### need to fix code so it distinguishes between year and site_cat in the model
set.seed(123)
adonis_res <- adonis2(
as.matrix(benthic_nmds[, nmds_var]) ~ site_cat + period,
data = benthic_nmds,
permutations = 999,
method = "bray"
)
permanova_table <- adonis_res %>%
as.data.frame() %>%
rownames_to_column("Factor") %>%
filter(Factor != "Residual") %>% # usually not shown
select(Factor, Df, R2, F, `Pr(>F)`) %>%
rename(p = `Pr(>F)`) %>%
mutate(across(where(is.numeric), ~ round(.x, 3)))
kable(permanova_table)| Factor | Df | R2 | F | p |
|---|---|---|---|---|
| Model | 2 | 0.539 | 23.985 | 0.001 |
| Total | 43 | 1.000 | NA | NA |
ft <- flextable(permanova_table) |>
autofit()
read_docx() |>
body_add_flextable(ft) |>
print(target = here("agrra_monitoring", "figs", "benthic_nmds_permanova.docx"))# envfit
full_names <- c(
lc = "Live coral",
ta = "Turf algae",
tas = "Turf algae – sediment",
fma = "Fleshy macroalgae",
cma = "Calcareous macroalgae",
ainv = "Aggressive invertebrates",
cca = "Crustose coralline algae"
)
env_table <- envfit(benthic_nmds_ord, benthic_pc, permutations = 999) %>%
(\(.) {
arrows <- as.data.frame(.$vectors$arrows) %>%
rownames_to_column("code")
stats <- tibble(
code = rownames(.$vectors$arrows),
r2 = .$vectors$r,
p = .$vectors$pvals
)
arrows %>%
left_join(stats, by = "code") %>%
mutate(
category = full_names[code]
) %>%
select(category, code, NMDS1, NMDS2, r2, p)
})() %>%
filter(p < 0.05) %>% # keep only significant variables
arrange(p, desc(r2)) %>% # order by sig, then by r2
mutate(across(
where(is.numeric),
~ round(.x, 3)
))
# export table
library(gt)
gt_table <- env_table %>%
select(-code) %>% # remove code column
gt() %>%
fmt_number(
columns = where(is.numeric),
decimals = 3
) %>%
cols_align(
align = "center",
columns = everything()
) %>%
cols_label(
category = "Category",
NMDS1 = "NMDS1",
NMDS2 = "NMDS2",
r2 = "R²",
p = "p-value"
)
kable(env_table)| category | code | NMDS1 | NMDS2 | r2 | p |
|---|---|---|---|---|---|
| Turf algae – sediment | tas | -0.997 | 0.080 | 0.979 | 0.001 |
| Fleshy macroalgae | fma | 0.497 | 0.868 | 0.947 | 0.001 |
| Turf algae | ta | 0.326 | -0.945 | 0.701 | 0.001 |
| Crustose coralline algae | cca | 0.941 | -0.338 | 0.455 | 0.001 |
| Live coral | lc | 0.385 | -0.923 | 0.431 | 0.001 |
#### NEMMA only
benthic_nmds <- benthic_cover_site %>%
filter(site_cat == "NEMMA") %>%
select(-site_cat,
-contains("_sd"),
-contains("cyan"),
-contains("peys")) %>%
rename_with(~ gsub("_mean", "", .x)) %>%
mutate(year = as.character(year))
# Separate metadata and community data
benthic_meta <- benthic_nmds %>% ungroup %>% select(site, year)
benthic_pc <- benthic_nmds %>% ungroup %>% select(-site, -year)
# Run NMDS
set.seed(123) # fixes random number generator so results are reproducible
benthic_nmds_ord <- metaMDS(benthic_pc, distance = "bray", k = 2, trymax = 100, trace = 0)
# Extract NMDS scores and add metadata
site_scores <- scores(benthic_nmds_ord, display = "sites") %>%
as.data.frame() %>%
mutate(site = benthic_meta$site,
year = benthic_meta$year)
# Extract pc (community variable) scores and add metadata
pc_scores <- scores(benthic_nmds_ord, display = "species") %>%
as.data.frame() %>%
rownames_to_column(var = "category")
# Plot NMDS
## defining NMDS arrows
arrow_mult <- 2
pc_scores_scaled <- pc_scores %>%
mutate(NMDS1 = NMDS1 * arrow_mult,
NMDS2 = NMDS2 * arrow_mult)
## adding offset columns to move arrow labels
pc_scores_scaled <- pc_scores_scaled %>%
mutate(
label_nudge_x = ifelse(NMDS1 >= 0, 0.05, -0.05),
label_nudge_y = ifelse(NMDS2 >= 0, 0.05, -0.05)
)
## plot
ggplot(site_scores, aes(x = NMDS1, y = NMDS2, color = year)) +
geom_point(size = 4) +
scale_shape_manual(values = 0:25) +
stat_ellipse(aes(group = year), type = "t", linetype = 2) +
geom_segment(data = pc_scores_scaled,
aes(x = 0, y = 0, xend = NMDS1, yend = NMDS2),
arrow = arrow(length = unit(0.3, "cm")),
color = "black",
inherit.aes = FALSE) +
geom_text(data = pc_scores_scaled,
aes(x = NMDS1 + label_nudge_x,
y = NMDS2 + label_nudge_y,
label = category),
color = "black",
inherit.aes = FALSE) +
theme_minimal()
# PERMANOVA (adonis2)
set.seed(123)
adonis_res <- adonis2(benthic_pc ~ year,
data = benthic_meta,
method = "bray",
permutations = 999)
print(adonis_res)#### NDNP only
benthic_nmds <- benthic_cover_site %>%
filter(site_cat == "NDNP") %>%
select(-site_cat,
-contains("_sd"),
-contains("cyan"),
-contains("peys")) %>%
rename_with(~ gsub("_mean", "", .x)) %>%
mutate(year = as.character(year))
# Separate metadata and community data
benthic_meta <- benthic_nmds %>% ungroup %>% select(site, year)
benthic_pc <- benthic_nmds %>% ungroup %>% select(-site, -year)
# Run NMDS
set.seed(123) # fixes random number generator so results are reproducible
benthic_nmds_ord <- metaMDS(benthic_pc, distance = "bray", k = 2, trymax = 100, trace = 0)
# Extract NMDS scores and add metadata
site_scores <- scores(benthic_nmds_ord, display = "sites") %>%
as.data.frame() %>%
mutate(site = benthic_meta$site,
year = benthic_meta$year)
# Extract pc (community variable) scores and add metadata
pc_scores <- scores(benthic_nmds_ord, display = "species") %>%
as.data.frame() %>%
rownames_to_column(var = "category")
# Plot NMDS
## defining NMDS arrows
arrow_mult <- 2
pc_scores_scaled <- pc_scores %>%
mutate(NMDS1 = NMDS1 * arrow_mult,
NMDS2 = NMDS2 * arrow_mult)
## adding offset columns to move arrow labels
pc_scores_scaled <- pc_scores_scaled %>%
mutate(
label_nudge_x = ifelse(NMDS1 >= 0, 0.05, -0.05),
label_nudge_y = ifelse(NMDS2 >= 0, 0.05, -0.05)
)
## plot
ggplot(site_scores, aes(x = NMDS1, y = NMDS2, color = year)) +
geom_point(size = 4) +
stat_ellipse(aes(group = year), type = "t", linetype = 2) +
geom_segment(data = pc_scores_scaled,
aes(x = 0, y = 0, xend = NMDS1, yend = NMDS2),
arrow = arrow(length = unit(0.3, "cm")),
color = "black",
inherit.aes = FALSE) +
geom_text(data = pc_scores_scaled,
aes(x = NMDS1 + label_nudge_x,
y = NMDS2 + label_nudge_y,
label = category),
color = "black",
inherit.aes = FALSE) +
theme_minimal()
# PERMANOVA (adonis2)
set.seed(123)
adonis_res <- adonis2(benthic_pc ~ year,
data = benthic_meta,
method = "bray",
permutations = 999)
print(adonis_res)Site 12 is skewing this quite a bit - but don’t see reason to omit?
# metadata for genera - family matching
fma_key <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoverScaled.xlsx"), sheet = "Metadata") %>%
clean_names() %>%
fill(scaled_benthic_cover, column_header) %>%
filter(str_starts(scaled_benthic_cover, "tFMA")) %>%
select(code = column_header, genera = definition) %>%
mutate(genera = if_else(code == "BFMA", "Other BFMA",
if_else(code == "GFMA", "Other GFMA",
if_else(code == "RFMA", "Other RFMA",
if_else(code == "FMA", "Other FMA", genera)))),
code = tolower(code))
fma_gen_site <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoverScaled.xlsx"), sheet = "tFMA") %>%
mutate(site_cat = "NEMMA") %>%
bind_rows(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoverScaled.xlsx"), sheet = "tFMA") %>%
mutate(site_cat = "NDNP")) %>%
rename_with(fix_names) %>%
mutate(year = 2025,
site = if_else(name == "Little Bird Patch", "Little Bird Backreef", name)) %>%
select(year, site_cat, site, contains("avg"), contains("std"), fma_tot_avg = t_fma_avg, fma_tot_std = t_fma_std) %>% # otherwise tFMA and (other) FMA columns get duplicated as FMA
rename_with(~ gsub("^t_", "", .x), .cols = -c(site, year)) %>%
pivot_longer(
cols = -c(site_cat, site, year),
names_to = "family_stat",
values_to = "value"
) %>%
separate(family_stat, into = c("code", "stat"), sep = "_(?=[^_]+$)") %>%
left_join(fma_key, by = "code") %>%
select(year, site_cat, site, code, genera, stat, value) %>%
filter(!is.na(genera))
fma_gen_year <- fma_gen_site %>%
filter(stat == "avg") %>%
select(-stat, avg = value) %>%
group_by(year, site_cat, genera) %>%
summarize(mean = mean(avg),
sd = sd(avg))ggplot(fma_gen_year %>%
filter(year == 2025) %>%
group_by(genera) %>%
filter(sum(mean) > 0),
aes(x = genera, y = mean)) +
geom_col() +
facet_wrap(. ~ site_cat) +
geom_errorbar(
aes(ymin = mean, ymax = mean + sd),
width = 0.2 # width of the error bars
) +
labs(x = NULL, y = "Mean Percent Cover (2025)") +
coord_flip() +
theme_bw()algal_height_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "AlgaePointCoverScaled", "AlgaePointCoverScaledBySite.xlsx"), sheet = "Data") %>%
rename_with(fix_names) %>%
mutate(year = year(date)) %>%
select(year, site, fma_height = fh_avg)
algal_height_2025 <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicAlgalHeight.xlsx"), sheet = "Scaled") %>%
bind_rows(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicAlgalHeight.xlsx"), sheet = "Scaled")) %>%
rename_with(fix_names) %>%
mutate(year = 2025,
site = if_else(name == "Little Bird Patch", "Little Bird Backreef", name)) %>%
select(year, site, fma_height = 't_fma-h')
algal_height_site <- bind_rows(algal_height_hist, algal_height_2025) %>%
mutate(site = if_else(str_detect(site, "^Site \\d+"),
str_extract(site, "^Site \\d+"),
site), # dealing with NPA sites that have site numbers with different name variations afterwards
site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
if_else(str_detect(site, "Redonda"), "Redonda",
"NEMMA"))) %>%
filter(year != 2022 & site_cat %in% c("NEMMA", "NDNP"))
algal_height_year <- algal_height_site %>%
group_by(year, site_cat) %>%
summarize(mean = mean(fma_height),
sd = sd(fma_height))ggplot(algal_height_year, aes(x = factor(year), y = mean)) +
geom_col() +
facet_wrap(. ~ site_cat) +
geom_errorbar(
aes(ymin = mean - sd, ymax = mean + sd),
width = 0.2 # width of the error bars
) +
labs(x = NULL, y = "Mean FMA canopy height (cm)") +
theme_bw()fma_delta <- algal_height_site %>%
group_by(site, site_cat) %>%
summarise(
delta = fma_height[year == max(year)] -
fma_height[year == min(year)],
.groups = "drop"
) %>%
group_by(site_cat) %>%
summarise(
mean_delta = mean(delta, na.rm = TRUE),
ci_lower = mean_delta - qt(0.975, df = n() - 1) * sd(delta, na.rm = TRUE) / sqrt(n()),
ci_upper = mean_delta + qt(0.975, df = n() - 1) * sd(delta, na.rm = TRUE) / sqrt(n()),
metric = "Fleshy macroalgal height",
.groups = "drop"
)
ggplot(fma_delta,
aes(x = mean_delta, y = metric)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
geom_errorbarh(
aes(xmin = ci_lower, xmax = ci_upper),
height = 0.2
) +
geom_point(size = 3) +
facet_wrap(~ site_cat) +
labs(
x = "Mean change (Δ)",
y = NULL
) +
theme_bw() +
theme(
strip.background = element_blank(),
strip.text = element_text(face = "bold"),
panel.grid.major.y = element_blank()
)Data overview:
# metadata for species - family matching
coral_meta <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoralCoverScaled.xlsx"), sheet = "Metadata") %>%
clean_names() %>%
fill(scaled_coral_cover, column_header)
coral_meta_fam <- coral_meta %>%
filter(str_starts(column_header, "t") | column_header == "UNKN") %>%
select(family_code = column_header, family = definition) %>%
mutate(family = gsub("\\s*\\([^\\)]+\\)", "", family)) %>% # removing fluff
distinct() # removing UNKN duplicate
coral_meta_spp <- coral_meta %>%
filter(str_starts(scaled_coral_cover, "t") | scaled_coral_cover == "UNKN") %>%
select(species_code = column_header, family_code = scaled_coral_cover, species = definition) %>%
left_join(coral_meta_fam, by = "family_code") %>%
mutate(genus = sub(" .*", "", species),
family_code = str_remove(family_code, "^t"),
across(c(species, family), ~ str_replace(., "Unknown sp. of Live Coral", "Unknown spp.")),
family = if_else(family == "Scleractinia incertae sedis", "Unknown spp.", family))
# historical data
coral_spp_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "CoralCoverSpeciesScaled", "CoralCoverSpeciesScaledBySite.xlsx"), sheet = "Data") %>%
rename_with(fix_names) %>%
mutate(year = year(date)) %>%
filter(year %in% c(2017, 2019)) %>% # 2022 was incomplete NPA year
pivot_longer(
cols = matches("^[a-z]{4}_(avg|std)$"), # all columns like acer_ave, apal_std, etc.
names_to = c("species_code", ".value"), # split into species and measurement type
names_sep = "_"
) %>%
mutate(across(c(avg, std), ~ .x / 100)) %>% # to match 2025 format (fraction of 1)
select(site, year, species_code, avg, std)
# 2025 data
file_coral_nemma <- here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoralCoverScaled.xlsx")
file_coral_npa <- here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoralCoverScaled.xlsx")
clean_excel_coral <- function(file_path) {
sheets <- excel_sheets(file_path)
sheets_to_join <- setdiff(sheets, c("TermsOfUse", "Metadata", "Overall")) # exclude
sheets_list <- map(sheets_to_join, ~ read_excel(file_path, sheet = .x) %>%
rename_with(~ gsub("^(t)([A-Za-z]{4})(avg|med|std)$", "\\1_\\2_\\3", .x, ignore.case = TRUE)) %>%
rename_with(~ gsub("([A-Za-z]{4})(avg|med|std)$", "\\1_\\2", .x, ignore.case = TRUE)) %>%
clean_names())
reduce(sheets_list, left_join, by = c("survey_id", "code", "name", "nt"))
}
coral_spp_2025 <- clean_excel_coral(file_coral_npa) %>%
bind_rows(clean_excel_coral(file_coral_nemma)) %>%
mutate(year = 2025) %>%
rename(site = name) %>%
pivot_longer(
cols = matches("^[a-z]{4}_(avg|std)$"), # all columns like acer_ave, apal_std, etc.
names_to = c("species_code", ".value"), # split into species and measurement type
names_sep = "_"
) %>%
select(site, year, species_code, avg, std)
# merge for complete df
coral_spp_site <- bind_rows(coral_spp_hist, coral_spp_2025) %>%
mutate(site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
if_else(str_detect(site, "Redonda"), "Redonda",
"NEMMA")),
site = if_else(str_detect(site, "^Site \\d+"),
str_extract(site, "^Site \\d+"),
site)) %>%
mutate(species_code = toupper(species_code),
site = if_else(site == "Little Bird Patch", "Little Bird Backreef", site)) %>%
left_join(coral_meta_spp, by = "species_code") %>%
group_by(site) %>%
filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
ungroup()
coral_fam_site <- coral_spp_site %>%
group_by(year, site_cat, site, family) %>%
summarize(avg = sum(avg, na.rm = TRUE)) %>%
ungroup()
# aggregate to year level
coral_fam_year <- coral_fam_site %>%
group_by(year, site_cat, site) %>%
complete( # adding 0s for when nothing was present
family = unique(coral_spp_site$family),
fill = list(avg = 0)
) %>%
ungroup() %>%
group_by(year, site_cat, family) %>%
summarize(mean = mean(avg, na.rm = TRUE),
sd = sd(avg, na.rm = TRUE)) %>%
ungroup() %>%
group_by(year, site_cat) %>%
mutate(rel_mean = mean / sum(mean, na.rm = TRUE)) %>%
ungroup()
chk <- coral_fam_year %>%
group_by(year, site_cat) %>%
summarize(tot = sum(rel_mean, na.rm = TRUE))
coral_fam_year_wide <- coral_fam_year %>%
pivot_wider(names_from = family,
values_from = c(mean, rel_mean, sd),
names_glue = "{family}_{.value}")coral_fams <- c("Acroporidae", "Merulinidae", "Milleporidae", "Montastraeidae", "Poritidae", "Siderastreidae", "Unknown spp.")
ggplot(coral_fam_site %>%
filter(family %in% coral_fams),
aes(x = year, y = avg, group = site)) +
geom_point() +
geom_line() +
scale_y_continuous(labels = percent_format()) +
labs(x = "Year", y = "Percent cover") +
facet_grid(site_cat ~ family) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))# no longer have rel_mean by site
# ggplot(coral_fam_site %>%
# filter(family %in% coral_fams),
# aes(x = year, y = rel_mean, group = site)) +
# geom_point() +
# geom_line() +
# scale_y_continuous(labels = percent_format()) +
# labs(x = "Year", y = "Relative percent cover of live coral") +
# facet_grid(site_cat ~ family) +
# theme_bw() +
# theme(axis.text.x = element_text(angle = 45, hjust = 1))ggplot(coral_fam_year, aes(x = year, y = mean, group = site)) +
geom_point() +
geom_line() +
scale_y_continuous(labels = percent_format()) +
labs(x = "Year", y = "Mean percent cover", color = "Location") +
facet_grid(site_cat ~ family) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(coral_fam_year, aes(x = year, y = rel_mean, group = site_cat, color = site_cat)) +
geom_point() +
geom_line() +
scale_y_continuous(labels = percent_format()) +
labs(x = "Year", y = "Relative proportion", color = "Location") +
facet_wrap(~ family) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))2025 only
ggplot(coral_fam_year %>% filter(year == 2025), aes(x = family, y = mean)) +
geom_col() +
facet_wrap(. ~ site_cat) +
geom_errorbar(
aes(ymin = mean, ymax = mean + sd),
width = 0.2 # width of the error bars
) +
labs(x = NULL, y = "Mean Percent Cover") +
coord_flip() +
theme_bw()max_pc <- coral_fam_year_wide %>%
filter(site_cat == "NEMMA") %>%
select(contains("_mean") &
!contains("rel_mean") &
!contains("Unknown spp.") &
where(~ any(. >= 0.001))) %>%
unlist() %>%
max(na.rm = TRUE)
ncol_pc <- ncol(coral_fam_year_wide %>%
filter(site_cat == "NEMMA") %>%
select(contains("_mean") &
!contains("rel_mean") &
!contains("Unknown spp.") &
where(~ any(. >= 0.001)))
)
coral_rdr <-
rbind(rep(max_pc, ncol_pc),
rep(0, ncol_pc),
coral_fam_year_wide %>%
filter(site_cat == "NEMMA") %>%
column_to_rownames(var = "year") %>%
select(contains("_mean") &
!contains("rel_mean") &
!contains("Unknown spp.") &
where(~ any(. >= 0.001))) %>%
rename_with(~ gsub("_mean", "", .x))
)
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)
png("/Users/margaretwilson/Github/emc/agrra_monitoring/figs/coral_radar_NEMMA.png", width = 2000, height = 2000, res = 300)
# --- draw radar chart ---
radarchart(
coral_rdr,
pcol = line_colors,
pfcol = fill_colors,
cglcol = "grey"
)
# --- add legend ---
legend(
x = "topright",
legend = rownames(coral_rdr)[3:4],
col = line_colors,
lty = 1,
lwd = 2,
bty = "n",
cex = 0.8,
pt.cex = 1.5,
fill = fill_colors
)
dev.off()## quartz_off_screen
## 2
# radarchart(coral_rdr,
# pcol = line_colors,
# pfcol = fill_colors,
# cglcol = "grey")
#
# # doesn't like adding legend within rmarkdown - but can try within knit document
# legend(
# x = "topright",
# legend = rownames(coral_rdr)[3:4], # skip max/min rows
# col = line_colors,
# lty = 1,
# lwd = 2,
# bty = "n", # no box
# cex = 0.8,
# pt.cex = 1.5,
# fill = fill_colors # show fill colors in legend
# )max_pc <- coral_fam_year_wide %>%
filter(site_cat == "NDNP") %>%
select(contains("_mean") &
!contains("rel_mean") &
!contains("Unknown spp.") &
where(~ any(. >= 0.001))) %>%
unlist() %>%
max(na.rm = TRUE)
ncol_pc <- ncol(coral_fam_year_wide %>%
filter(site_cat == "NDNP") %>%
select(contains("_mean") &
!contains("rel_mean") &
!contains("Unknown spp.") &
where(~ any(. >= 0.001)))
)
coral_rdr <-
rbind(rep(max_pc, ncol_pc),
rep(0, ncol_pc),
coral_fam_year_wide %>%
filter(site_cat == "NDNP") %>%
column_to_rownames(var = "year") %>%
select(contains("_mean") &
!contains("rel_mean") &
!contains("Unknown spp.") &
where(~ any(. >= 0.001))) %>%
rename_with(~ gsub("_mean", "", .x))
)
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)
radarchart(coral_rdr,
pcol = line_colors,
pfcol = fill_colors,
cglcol = "grey")
# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
x = "topright",
legend = rownames(coral_rdr)[3:4], # skip max/min rows
col = line_colors,
lty = 1,
lwd = 2,
bty = "n", # no box
cex = 0.8,
pt.cex = 1.5,
fill = fill_colors # show fill colors in legend
)Data overview: - Historical: CoralRecruitsBySite.xlsx - family level - remove “DICH” code (not in 2025) - 2025: BenthicCoralRecruis.xlxs - species level - missing site-level standard deviation in data
# using coral_meta_spp key generated in prev. section
# historical data
recruits_hist_sized <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "CoralRecruits", "CoralRecruitsBySite.xlsx"), sheet = "Small") %>%
mutate (size = "small") %>%
rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "CoralRecruits", "CoralRecruitsBySite.xlsx"), sheet = "Large") %>%
mutate (size = "large")) %>%
rename_with(fix_names) %>%
mutate(year = year(date)) %>%
filter(year %in% c(2017, 2019)) %>% # 2022 was incomplete NPA year
pivot_longer(
cols = matches(".*_(avg|std)$"), # all columns ending in avg or std
names_to = c("family_code", ".value"), # split into species and measurement type
names_sep = "_"
) %>%
select(site, year, family_code, size, avg)
# adding totals because above is only small vs. large
recruits_hist_tot <- recruits_hist_sized %>%
group_by(site, year, family_code) %>%
summarize(
avg = sum(avg, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(size = "total") %>%
select(site, year, family_code, size, avg)
recruits_hist <- bind_rows(recruits_hist_tot, recruits_hist_sized)
# 2025 data - switch to "Overall" sheet #########
recruits_2025 <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoralRecruits.xlsx"), sheet = "Overall") %>%
bind_rows(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoralRecruits.xlsx"), sheet = "Overall")) %>%
rename_with(fix_names) %>%
mutate(year = 2025) %>%
rename(site = name) %>%
select(year, site, matches("_(small|large|total)$")) %>%
pivot_longer(
cols = matches("_(small|large|total)$"),
names_to = c("family_code", "size"),
names_pattern = "^(.*?)_(small|large|total)$",
values_to = "avg"
) %>%
select(site, year, family_code, size, avg) %>%
mutate(family_code = str_remove(family_code, "^t_"))
# no SD for 2025
# merge for complete df
recruits_fam_site <- bind_rows(recruits_hist, recruits_2025) %>%
mutate(site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
if_else(str_detect(site, "Redonda"), "Redonda",
"NEMMA")),
site = if_else(str_detect(site, "^Site \\d+"),
str_extract(site, "^Site \\d+"),
site),
site = if_else(site == "Little Bird Patch", "Little Bird Backreef", site),
family_code = toupper(family_code)) %>%
left_join(coral_meta_spp %>%
select(family_code, family) %>%
distinct(),
by = "family_code") %>%
mutate(family = if_else(family_code == "ALL", "All species",
if_else(family_code == "CARB", "Unknown spp.", family))) %>%
filter(family_code != "DICH") %>% # in historical data only, none present
group_by(site) %>%
filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
ungroup()
recruits_fam_year <- recruits_fam_site %>%
group_by(year, site_cat, family_code, family, size) %>%
summarize(mean = mean(avg), # 0s already included for absent families
sd = sd(avg)) %>%
group_by(year, site_cat) %>%
mutate(rel_mean = mean / sum(mean)) %>%
group_by(year, site_cat, family_code, family, size) %>%
filter(sum(mean) > 0) %>% # removing species that were 0 in all years/site_cats
ungroup()ggplot(recruits_fam_year %>% filter(year == 2025 & size == "total" & family != "All species"), aes(x = family, y = mean)) +
geom_col() +
facet_wrap(. ~ site_cat) +
geom_errorbar(
aes(ymin = mean, ymax = mean + sd),
width = 0.2 # width of the error bars
) +
labs(x = NULL, y = "Mean density") +
coord_flip() +
theme_bw()ggplot(recruits_fam_site %>%
filter(family_code %in% c("ALL") & size == "total"),
aes(x = year, y = avg, color = site)) +
geom_point() +
geom_line() +
labs(x = "Year", y = "Recruit density (indv/m2)", color = "Location") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))ggplot(recruits_fam_site %>%
filter(family_code != "ALL" & size == "total"),
aes(x = year, y = avg, color = site)) +
geom_point() +
geom_line() +
labs(x = "Year", y = "Recruit density (indv/m2)", color = "Location") +
facet_wrap(~ family) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))ggplot(recruits_fam_year %>% filter(family_code == "ALL" & size != "total"),
aes(x = factor(year), y = mean, group = site_cat)) +
geom_point() +
geom_line() +
geom_errorbar(
aes(
ymin = mean - sd,
ymax = mean + sd
),
width = 0.2,
alpha = 0.6
) +
facet_grid(size ~ site_cat) +
labs(y = "Mean density (indv. m2)", x = NULL) +
theme_bw()recruits_tot_site <- recruits_fam_site %>%
filter(family_code == "ALL" & size == "total")
p1 <- make_bp(recruits_tot_site, "NEMMA",
yvar = "avg", ylab = expression("Coral recruits (indv. m"^{-2}*")"),
y_max = 16,
title = TRUE) +
scale_y_continuous()
p2 <- make_bp(recruits_tot_site, "NDNP",
yvar = "avg", ylab = expression("Coral recruits (indv. m"^{-2}*")"),
y_max = 16,
title = TRUE,
remove_y_text = FALSE) +
scale_y_continuous()
final_plot <- (p1 | p2)
final_plot
ggsave(here("agrra_monitoring", "figs", "recruits.png"), plot = final_plot, width = 6, height = 4)Data overview: - 2025: MotileInverts.xlsx - 2017-2019: MotileInvertsBySite.xlsx
diadema_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "MotileInverts", "MotileInvertsBySite.xlsx"), sheet = "Data") %>%
rename_with(fix_names) %>%
mutate(year = year(date)) %>%
select(site, year, matches("^dt_|^da_|^dj_"))
diadema_2025 <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "MotileInverts.xlsx"), sheet = "Overall") %>%
rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "MotileInverts.xlsx"), sheet = "Overall")) %>%
rename_with(fix_names) %>%
mutate(year = 2025,
name = if_else(name == "Little Bird Patch", "Little Bird Backreef", name)) %>%
select(site = name, year, matches("^dt_|^da_|^dj_"), -ends_with("_med"))
diadema <- rbind(diadema_hist, diadema_2025) %>%
filter(year != 2022) %>% # incomplete year for NPA
mutate(site = if_else(str_detect(site, "^Site \\d+"),
str_extract(site, "^Site \\d+"),
site), # dealing with NPA sites that have site numbers with different name variations afterwards
site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
if_else(str_detect(site, "Redonda"), "Redonda",
"NEMMA"))) %>%
group_by(site) %>%
filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
ungroup()
diadema_site <- diadema %>%
select(site_cat, site, year, dt_avg, da_avg, dj_avg) %>% # keep density columns
pivot_longer(
cols = c(dt_avg, da_avg, dj_avg),
names_to = "stage",
values_to = "density"
) %>%
mutate(
stage = recode(stage,
dt_avg = "Total",
da_avg = "Adult",
dj_avg = "Juvenile"))
diadema_year <- diadema %>%
select(site_cat, year, dt_avg, da_avg, dj_avg) %>% # keep density columns
pivot_longer(
cols = c(dt_avg, da_avg, dj_avg),
names_to = "stage",
values_to = "density"
) %>%
mutate(
stage = recode(stage,
dt_avg = "Total",
da_avg = "Adult",
dj_avg = "Juvenile")) %>%
group_by(site_cat, year, stage) %>%
summarise(
mean_density = mean(density, na.rm = TRUE),
sd_density = sd(density, na.rm = TRUE),
.groups = "drop"
)ggplot(diadema_year, aes(x = factor(year), y = mean_density, fill = stage)) +
geom_col(position = "dodge") +
geom_errorbar(aes(ymin = mean_density - sd_density, ymax = mean_density + sd_density),
position = position_dodge(width = 0.9), width = 0.2) +
facet_wrap(~site_cat) +
labs(x = "Year", y = "Urchin Density (mean ± SD)", fill = "Stage") +
theme_minimal(base_size = 14)ggplot(diadema_site %>% filter(stage %in% c("Adult", "Juvenile")), aes(x = factor(year), y = density, group = site)) +
geom_line() +
geom_point(alpha = .5) +
facet_grid(stage ~ site_cat) +
labs(x = "Year", y = "Diadema density", fill = "Stage") +
theme_bw(base_size = 14)Data overview:
Notes + questions:
Data overview:
# create compatible family key by merging metadata
fish_fam_hist_temp <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishBiomass", "FishBiomassBySite.xlsx"), sheet = "Key") %>%
clean_names() %>%
slice(21:40) %>%
select(code_com = 1,
definition = 2) %>%
mutate(family = str_remove(word(definition, 1), ":"),
family = if_else(family == "Wrasse", "Wrasses",
if_else(code_com == "PUFF", "Pufferfishes", family))) %>%
select(-definition)
fish_fam_2025_temp <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishBiomass.xlsx"), sheet = "Metadata") %>%
clean_names() %>%
slice(23:42) %>%
select(code_lat = 2,
definition = 3) %>%
mutate(family = str_remove(word(definition, 1), ":")) %>%
select(-definition)
fish_fam_key <- full_join(fish_fam_hist_temp, fish_fam_2025_temp, by = "family") %>%
mutate(code_lat = tolower(gsub("^t", "", code_lat)),
code_com = tolower(code_com)) %>%
select(code_com, code_lat, family) %>%
bind_rows(
tibble(code_com = "t",
code_lat = "all",
family = "Total"))
# import data
fish_bm_hist_temp <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishBiomass", "FishBiomassBySite.xlsx"), sheet = "Data") %>%
rename_with(fix_names) %>%
mutate(year = year(date)) %>%
filter(nt >= 8) %>% # keeping only sites with 8 or more transects
select(site, year, contains("avg"), contains("std")) %>%
pivot_longer(
cols = -c(site, year),
names_to = "family_stat",
values_to = "value"
) %>%
separate(family_stat, into = c("code_com", "stat"), sep = "_(?=[^_]+$)") %>%
left_join(fish_fam_key, by = "code_com") %>%
select(site, year, code = code_lat, family, stat, value)
fish_bm_2025_temp <- rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishBiomass.xlsx"), sheet = "Overall"),
read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishBiomass.xlsx"), sheet = "Overall")) %>%
# deal with weird capitalization in species col names
rename_with(fix_names) %>%
filter(nt >= 8) %>% # keeping only sites with 8 or more transects
mutate(year = 2025,
site = if_else(name == "Little Bird Patch", "Little Bird Backreef", name)) %>%
select(site, year, contains("avg"), contains("std")) %>%
rename_with(~ gsub("^t_", "", .x), .cols = -c(site, year)) %>%
pivot_longer(
cols = -c(site, year),
names_to = "family_stat",
values_to = "value"
) %>%
separate(family_stat, into = c("code_lat", "stat"), sep = "_(?=[^_]+$)") %>%
left_join(fish_fam_key, by = "code_lat") %>%
select(site, year, code = code_lat, family, stat, value)
# merge datasets
fish_bm_site_temp <- rbind(fish_bm_hist_temp, fish_bm_2025_temp) %>%
mutate(site = if_else(str_detect(site, "^Site \\d+"),
str_extract(site, "^Site \\d+"),
site), # dealing with NPA site inconsistencies
site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
if_else(str_detect(site, "Redonda"), "Redonda",
"NEMMA"))) %>%
mutate(year = if_else(year == 2024, 2025, year)) %>% # one erroneous 2024 date
filter(year != 2022,
!is.na(family)) %>% # 2022 was an incomplete data collection year for NPA
group_by(site) %>%
filter(n_distinct(year) > 1) %>% # keeping only sites with over time comparisons
ungroup()
herb_bm_site_temp <- fish_bm_site_temp %>%
filter(code %in% c("scar", "acan")) %>%
group_by(site, site_cat, year, stat) %>%
summarise(
value = if_else(stat == "avg", sum(value, na.rm = TRUE),
sqrt(sum(value^2, na.rm = TRUE))), # for std
.groups = "drop"
) %>%
mutate(family = "Herbivores", code = "herb") %>%
distinct()
fish_bm_site <- rbind(fish_bm_site_temp, herb_bm_site_temp) %>%
pivot_wider(
names_from = stat, # use 'avg' and 'std' as new column names
values_from = value # fill those columns with the numeric values
) %>%
unnest(c(avg, std)) # make numeric instead of list
# wide version in benthic ~ fish section
fish_bm_year <- fish_bm_site %>%
group_by(year, site_cat, family, code) %>%
summarize(mean = mean(avg),
std = sd(avg)) %>%
# don't actually need this complete() here, but including as precaution
group_by(year, site_cat) %>%
complete(
family = unique(fish_bm_site$family),
fill = list(mean = 0)) %>% # list(mean = 0, rel_mean = 0)
ungroup()
# wide format for graphing purposes
fish_bm_year_wide <- fish_bm_year %>%
select(-code) %>%
pivot_wider(names_from = family,
values_from = c(mean, std),
names_glue = "{family}_{.value}")
# clean environment and export
# rm(list = ls(pattern = "_temp$"))
write.csv(fish_bm_year, here("agrra_monitoring", "data_outputs", "fish_bm_year.csv"))ggplot(fish_bm_site %>% filter(code == "all"),
aes(x = year, y = avg, group = site, color = site)) +
geom_point() +
geom_line() +
labs(x = "Year", y = "Total fish biomass (g/100m2)", color = "Site", group = "Site") +
theme_minimal()ggplot(fish_bm_site %>% filter(code == "scar"),
aes(x = year, y = avg, group = site, color = site)) +
geom_point() +
geom_line() +
labs(x = "Year", y = "Scarid biomass (g/100m2)", color = "Site", group = "Site") +
theme_minimal()ggplot(fish_bm_site %>% filter(code == "acan"),
aes(x = year, y = avg, group = site, color = site)) +
geom_point() +
geom_line() +
labs(x = "Year", y = "Acanthurid biomass (g/100m2)", color = "Site", group = "Site") +
theme_minimal()ggplot(fish_bm_site %>% filter(code == "serr"),
aes(x = year, y = avg, group = site, color = site)) +
geom_point() +
geom_line() +
labs(x = "Year", y = "Serranid biomass (g/100m2)", color = "Site", group = "Site") +
theme_minimal()ggplot(fish_bm_year, aes(x = year, y = mean, group = site_cat, color = site_cat)) +
geom_point() +
geom_line() +
labs(x = "Year", y = "Biomass", color = "Location") +
facet_wrap(~ family) +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))data <- fish_bm_year_wide %>%
filter(site_cat == "NEMMA") %>%
column_to_rownames(var = "year") %>%
select(contains("_mean") &
!contains("rel_mean") &
!contains(c("Total", "Herbivores")) &
where(~ any(. >= 100))) %>%
rename_with(~ gsub("_mean", "", .x))
max <- data %>%
unlist() %>%
max(na.rm = TRUE)
ncol <- ncol(data)
rdr <-
rbind(rep(max, ncol),
rep(0, ncol),
data)
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)
radarchart(rdr,
pcol = line_colors,
pfcol = fill_colors,
cglcol = "grey")
# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
x = "topright",
legend = rownames(rdr)[3:4], # skip max/min rows
col = line_colors,
lty = 1,
lwd = 2,
bty = "n", # no box
cex = 0.8,
pt.cex = 1.5,
fill = fill_colors # show fill colors in legend
)data <- fish_bm_year_wide %>%
filter(site_cat == "NDNP") %>%
column_to_rownames(var = "year") %>%
select(contains("_mean") &
!contains("rel_mean") &
!contains(c("Total", "Herbivores")) &
where(~ any(. >= 100))) %>% # at least one mean bm value must be over 100
rename_with(~ gsub("_mean", "", .x))
max <- data %>%
unlist() %>%
max(na.rm = TRUE)
ncol <- ncol(data)
rdr <-
rbind(rep(max, ncol),
rep(0, ncol),
data)
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)
radarchart(rdr,
pcol = line_colors,
pfcol = fill_colors,
cglcol = "grey")
legend(
x = "topright",
legend = rownames(rdr)[3:4], # skip max/min rows
col = line_colors,
lty = 1,
lwd = 2,
bty = "n", # no box
cex = 0.8,
pt.cex = 1.5,
fill = fill_colors # show fill colors in legend
)# import data
fish_den_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishDensity", "FishDensityBySite.xlsx"), sheet = "Data") %>%
rename_with(fix_names) %>%
mutate(year = year(date)) %>%
filter(nt >= 8) %>% # keeping only sites with 8 or more transects
select(site, year, contains("avg"), contains("std")) %>%
pivot_longer(
cols = -c(site, year),
names_to = "family_stat",
values_to = "value"
) %>%
separate(family_stat, into = c("code_com", "stat"), sep = "_(?=[^_]+$)") %>%
left_join(fish_fam_key, by = "code_com") %>%
select(site, year, code = code_lat, family, stat, value)
fish_den_2025 <- rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishDensity.xlsx"), sheet = "Overall"),
read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishDensity.xlsx"), sheet = "Overall")) %>%
# deal with weird capitalization in species col names
rename_with(fix_names) %>%
filter(nt >= 8) %>% # keeping only sites with 8 or more transects
mutate(year = 2025,
site = if_else(name == "Little Bird Patch", "Little Bird Backreef", name)) %>%
select(site, year, contains("avg"), contains("std")) %>%
rename_with(~ gsub("^t_", "", .x), .cols = -c(site, year)) %>%
pivot_longer(
cols = -c(site, year),
names_to = "family_stat",
values_to = "value"
) %>%
separate(family_stat, into = c("code_lat", "stat"), sep = "_(?=[^_]+$)") %>%
left_join(fish_fam_key, by = "code_lat") %>%
select(site, year, code = code_lat, family, stat, value)
# merge datasets
fish_den_site <- rbind(fish_den_hist, fish_den_2025) %>%
mutate(site = if_else(str_detect(site, "^Site \\d+"),
str_extract(site, "^Site \\d+"),
site), # dealing with NPA site inconsistencies
site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
if_else(str_detect(site, "Redonda"), "Redonda",
"NEMMA"))) %>%
mutate(year = if_else(year == 2024, 2025, year)) %>% # one erroneous 2024 date
filter(year != 2022,
!is.na(family)) %>% # 2022 was an incomplete data collection year for NPA
group_by(site) %>%
filter(n_distinct(year) > 1) %>% # keeping only sites with over time comparisons
ungroup() %>%
pivot_wider(
names_from = stat, # use 'avg' and 'std' as new column names
values_from = value # fill those columns with the numeric values
) %>%
unnest(c(avg, std)) # make numeric instead of list# calculating paired changes per site, and 95% confidence intervals
# --- compute paired deltas per site, then summarize per family × site_cat ---
df_change <- fish_den_site %>%
filter(year %in% c(2017, 2019, 2025)) %>%
# pivot so each site has avg_2017, avg_2019, avg_2025 (NA where missing)
pivot_wider(
id_cols = c(site_cat, family, site),
names_from = year,
values_from = avg,
names_glue = "avg_{year}"
) %>%
# baseline = 2017 if present otherwise 2019
mutate(
baseline_value = coalesce(avg_2017, avg_2019)
) %>%
# keep only sites that have both a baseline and 2025
filter(!is.na(baseline_value), !is.na(avg_2025)) %>%
# compute paired delta per site
mutate(delta = avg_2025 - baseline_value) %>%
# summarize to family × site_cat
group_by(site_cat, family) %>%
summarise(
n_sites = n(),
mean_change = mean(delta, na.rm = TRUE),
sd_change = sd(delta, na.rm = TRUE),
se_change = sd_change / sqrt(n_sites),
ci_low = mean_change - 1.96 * se_change,
ci_high = mean_change + 1.96 * se_change,
.groups = "drop"
)
# quick diagnostic: show how many sites contributed per family × site_cat
print(df_change %>% arrange(site_cat, desc(n_sites)))## # A tibble: 42 × 8
## site_cat family n_sites mean_change sd_change se_change ci_low ci_high
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NDNP Angelfishes 14 -0.285 0.391 0.105 -0.490 -0.0804
## 2 NDNP Barracudas 14 0.00516 0.127 0.0340 -0.0614 0.0717
## 3 NDNP Boxfishes 14 -0.106 0.149 0.0397 -0.184 -0.0281
## 4 NDNP Butterflyfi… 14 -1.42 1.47 0.393 -2.19 -0.647
## 5 NDNP Chubs 14 -0.0643 0.214 0.0571 -0.176 0.0477
## 6 NDNP Damselfishes 14 -0.475 0.672 0.180 -0.828 -0.123
## 7 NDNP Filefishes 14 -0.348 0.392 0.105 -0.554 -0.143
## 8 NDNP Groupers 14 -6.64 2.85 0.762 -8.14 -5.15
## 9 NDNP Grunts 14 -3.97 5.65 1.51 -6.93 -1.02
## 10 NDNP Jacks 14 -0.151 0.777 0.208 -0.558 0.256
## # ℹ 32 more rows
# --- forest plot: one point per family per panel (site_cat) ---
p <- ggplot(df_change %>% group_by(site_cat) %>% mutate(family = fct_reorder(family, mean_change)),
aes(x = mean_change, y = family)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
geom_pointrange(aes(xmin = ci_low, xmax = ci_high), size = 0.2) +
facet_wrap(~ site_cat, scales = "free_y") +
theme_bw(base_size = 12) +
labs(
x = expression("Change in density (indv. 100m"^{-2}*")"),
y = "Family")
ggsave(plot = p, here("agrra_monitoring", "figs", "fish_den_forest.png"), width = 6, height = 4)Data overview:
Notes + questions:
# starting with scarids only as mock-up
scar_length_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishSizeFreq", "FishSizeFreqByTransect.xlsx"), sheet = "PARR") %>%
clean_names() %>%
filter(!is.na(site)) %>%
mutate(year = year(date),
across(
.cols = contains("percent_"),
.fns = ~ .x / 100)) %>%
select(site, year, transect = trans, nf, contains("percent_")) %>%
rename_with(~ str_remove_all(.x, "percent_")) %>%
rename("41_up" = "40") %>%
rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSCAR") %>%
clean_names() %>%
mutate(year = 2025,
survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name)) %>%
select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
rowwise() %>%
mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>% # select only columns with digits (e.g., 50, 60) and sum across these to be consistent with 2017 data, should not affect most of our fish families of interest
ungroup() %>%
select(!matches("^\\d+$"))) %>%
rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSCAR") %>%
clean_names() %>%
mutate(year = 2025) %>%
select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
rowwise() %>%
mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>%
ungroup() %>%
select(!matches("^\\d+$"))) %>%
mutate(site = if_else(str_detect(site, "^Site \\d+"),
str_extract(site, "^Site \\d+"),
site), # dealing with NPA sites that have site numbers with different name variations afterwards
site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
if_else(str_detect(site, "Redonda"), "Redonda",
"NEMMA"))) %>%
filter(year != 2022) %>% # 2022 was an incomplete data collection year for NPA
group_by(site, year) %>%
filter(n_distinct(transect) >= 8) %>% # keeping only sites with at least 8 transects per year
group_by(site) %>%
filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
ungroup()
# check that all transects are present even if no fish per family were recorded - should have 0 entries if all is well
# chk <- full_join(scar_length_trans %>%
# group_by(year, site) %>%
# summarize(count = n()),
# fish_bm_trans %>%
# group_by(year, site) %>%
# summarize(count = n()),
# by = c("year", "site"), suffix = c("_length", "_bm")) %>%
# filter(count_length != count_bm | is.na(count_length) | is.na(count_bm))
# transform data for summaries
scar_length_long <- scar_length_trans %>%
filter(nf != 0) %>%
pivot_longer(
cols = matches("^\\d+_?\\d?"),
names_to = "length_mid",
values_to = "percent"
) %>%
mutate(
# convert bin names like "0_5" -> 2.5
length_mid = str_extract(length_mid, "\\d+_?\\d*"),
length_mid = ifelse(str_detect(length_mid, "_"),
sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
as.numeric(length_mid))) %>%
group_by(year, site, transect) %>%
mutate(prop = percent / sum(percent)) %>% # accounts for small deviations where total proportions don't equal exactly 1
ungroup() %>%
select(-percent)
scar_length_site <- scar_length_long %>%
group_by(year, site, site_cat, length_mid) %>%
summarise(mean_prop = mean(prop), .groups = "drop")
scar_length_year <- scar_length_site %>%
group_by(year, site_cat, length_mid) %>%
summarise(mean_prop = mean(mean_prop), .groups = "drop")ggplot(scar_length_year %>%
mutate(year = as.character(year)),
aes(x = length_mid, weight = mean_prop, group = year, color = year, fill = year)) +
geom_density(alpha = 0.3, adjust = 1) +
facet_wrap(~ site_cat, ncol = 2) +
labs(x = "Fish length (cm) - Scaridae", y = "Density") +
theme_minimal()# transform data for summaries
scar_length_long_binned <- scar_length_trans %>%
filter(nf != 0) %>%
pivot_longer(
cols = matches("^\\d+_?\\d?"),
names_to = "size_bin",
values_to = "percent"
) %>%
rename(nf_tot = nf) %>%
mutate(nf_bin = nf_tot * percent)
scar_counts <- scar_length_long_binned %>%
mutate(size_bin = case_when(
size_bin %in% c("21_30", "31_40", "41_up") ~ "21_up",
TRUE ~ size_bin)) %>%
group_by(year, site_cat, size_bin) %>%
summarise(count = sum(nf_bin), .groups = "drop")nemma_table <- scar_counts %>%
filter(site_cat == "NEMMA") %>%
select(year, size_bin, count) %>%
pivot_wider(names_from = year, values_from = count, values_fill = 0)
chisq_result_nemma <- chisq.test(as.matrix(nemma_table[,-1]))
chisq_result_nemma##
## Pearson's Chi-squared test
##
## data: as.matrix(nemma_table[, -1])
## X-squared = 81.011, df = 3, p-value < 2.2e-16
ndnp_table <- scar_counts %>%
filter(site_cat == "NDNP") %>%
select(year, size_bin, count) %>%
pivot_wider(names_from = year, values_from = count, values_fill = 0)
chisq_result_ndnp <- chisq.test(as.matrix(ndnp_table[,-1]))
chisq_result_ndnp##
## Pearson's Chi-squared test
##
## data: as.matrix(ndnp_table[, -1])
## X-squared = 15.239, df = 3, p-value = 0.001623
tab <- scar_counts %>%
filter(site_cat == "NEMMA") %>%
mutate(size_bin = factor(size_bin, levels = c("0_5","6_10","11_20","21_up"))) %>%
arrange(size_bin) %>%
pivot_wider(names_from = year, values_from = count, values_fill = 0) %>%
select(-site_cat)
# convert to matrix, drop the size_bin column
mat <- as.matrix(tab %>% select(-size_bin))
row.names(mat) <- tab$size_bin
mosaicplot(mat,
color = TRUE, # color each year
main = paste("Fish size distribution by year —", "NEMMA"),
xlab = "Size Bin",
ylab = "Year",
las = 1) # rotate y-axis labelstab <- scar_counts %>%
filter(site_cat == "NDNP") %>%
mutate(size_bin = factor(size_bin, levels = c("0_5","6_10","11_20","21_up"))) %>%
arrange(size_bin) %>%
pivot_wider(names_from = year, values_from = count, values_fill = 0) %>%
select(-site_cat)
# convert to matrix, drop the size_bin column
mat <- as.matrix(tab %>% select(-size_bin))
row.names(mat) <- tab$size_bin
mosaicplot(mat,
color = TRUE, # color each year
main = paste("Fish size distribution by year —", "NDNP"),
xlab = "Size Bin",
ylab = "Year",
las = 1) # rotate y-axis labelsCurrently only 2025 data, because appears that 2017/2019 have no phase data…?
# raw historical data
fish_tax_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "FishTaxonomy", skip = 1, col_names = TRUE) %>% # deals with merged headers
clean_names() %>%
unite(species, genus, species, sep = " ", remove = TRUE)
fish_raw_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-FishRaw.xlsx"), sheet = "Counts Raw") %>%
clean_names() %>%
left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Transect") %>%
clean_names() %>%
rename(transect = id, date = surveyed) %>%
left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Survey") %>%
clean_names() %>%
select(survey = id, site = name))) %>%
filter(!is.na(site)) %>%
mutate(year = year(date),
size_class = str_replace_all(size_class, "-", "_"),
site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
if_else(str_detect(site, "Redonda"), "Redonda",
"NEMMA")),
# convert bin names like "0_5" -> 2.5
length_mid = str_extract(size_class, "\\d+_?\\d*"),
length_mid = ifelse(str_detect(length_mid, "_"),
sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
as.numeric(length_mid))) %>%
left_join(fish_tax_hist, by = c("taxonomy" = "id")) %>%
select(year, site, site_cat, trans = transect, size_class, length_mid, common_name, common_family, family, species) # no terminal phase in this data?
# raw 2025 data
fish_tax_2025 <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "Metadata.xlsx"), sheet = "FishTaxonomy", skip = 1, col_names = TRUE) %>% # deals with merged headers
clean_names() %>%
unite(species, genus, species, sep = " ", remove = TRUE)
# could replace NA species with 'spp.' before uniting
chk <- fish_tax_hist %>% # make sure taxonomic lookups are consistent
full_join(fish_tax_2025, by = "id", suffix = c("_hist", "_2025")) %>%
mutate(match = species_hist == species_2025)
fish_raw_2025 <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "FishRaw.xlsx"), sheet = "Counts Raw") %>%
clean_names() %>%
left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "Metadata.xlsx"), sheet = "Transect") %>%
clean_names() %>%
select(transect = id, survey, date = surveyed) %>%
left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "Metadata.xlsx"), sheet = "Survey") %>%
clean_names() %>%
select(survey = id, site = name))) %>%
bind_rows(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Raw", "FishRaw.xlsx"), sheet = "Counts Raw") %>%
clean_names() %>%
left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Raw", "Metadata.xlsx"), sheet = "Transect") %>%
clean_names() %>%
select(transect = id, survey, date = surveyed) %>%
left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Raw", "Metadata.xlsx"), sheet = "Survey") %>%
clean_names() %>%
select(survey = id, site = name)))
) %>%
clean_names() %>%
mutate(year = year(date),
size_class = str_replace_all(size_class, " - ", "_"),
size_class = str_replace_all(size_class, "cm", ""),
site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
if_else(str_detect(site, "Redonda"), "Redonda",
"NEMMA")),
# convert bin names like "0_5" -> 2.5
length_mid = str_extract(size_class, "\\d+_?\\d*"),
length_mid = ifelse(str_detect(length_mid, "_"),
sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
as.numeric(length_mid))) %>%
left_join(fish_tax_2025, by = c("taxonomy" = "id")) %>%
select(year, site, site_cat, trans = transect, size_class, length_mid, common_name, common_family, family, species, terminal_phase)
# merge all years
# fish_raw <- bind_rows(fish_raw_hist, fish_raw_2025)
# scarids only
scarids <- fish_raw_2025 %>%
filter(species %in% c("Sparisoma viride", "Sparisoma aurofrenatum", "Scarus vetula", "Scarus iseri")) %>%
mutate(sex = case_when(terminal_phase == "No" ~ "Female",
terminal_phase == "Yes" ~ "Male"),
size_class = factor(size_class, levels = c("0_5","6_10","11_20","21_30", "31_40", "41_50")))
ggplot(scarids, aes(x = length_mid, fill = sex)) +
geom_histogram(stat = "count", alpha = 0.8, position = position_dodge()) +
facet_wrap(vars(species), ncol = 4) +
geom_vline(data = filter(scarids, species == "Sparisoma viride"), aes(xintercept = 18, color = "Minimum length at maturity"), linetype = "dashed") +
geom_vline(data = filter(scarids, species == "Scarus vetula"), aes(xintercept = 19, color = "Minimum length at maturity"), linetype = "dashed") +
geom_vline(data = filter(scarids, species == "Sparisoma aurofrenatum"), aes(xintercept = 15, color = "Minimum length at maturity"), linetype = "dashed") +
geom_vline(data = filter(scarids, species == "Scarus iseri"), aes(xintercept = 19, color = "Minimum length at maturity"), linetype = "dashed") +
scale_color_manual(values = c("black")) +
scale_fill_manual(values=c("pink2", "turquoise3")) +
theme_bw() +
labs(x = "Fish length (cm)", y = "Number observed", fill="") +
theme(strip.text = element_text(face = "italic"),
legend.title = element_blank(),
legend.position = "top",
axis.text.x = element_text(angle = 45, hjust = 1))# starting with scarids only as mock-up
serr_length_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishSizeFreq", "FishSizeFreqByTransect.xlsx"), sheet = "GROU") %>%
clean_names() %>%
filter(!is.na(site)) %>%
mutate(year = year(date),
across(
.cols = contains("percent_"),
.fns = ~ .x / 100)) %>%
select(site, year, transect = trans, nf, contains("percent_")) %>%
rename_with(~ str_remove_all(.x, "percent_")) %>%
rename("41_up" = "40") %>%
rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSERR") %>%
clean_names() %>%
mutate(year = 2025,
survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name)) %>%
select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
rowwise() %>%
mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>% # select only columns with digits (e.g., 50, 60) and sum across these to be consistent with 2017 data, should not affect most of our fish families of interest
ungroup() %>%
select(!matches("^\\d+$"))) %>%
rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSERR") %>%
clean_names() %>%
mutate(year = 2025) %>%
select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
rowwise() %>%
mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>%
ungroup() %>%
select(!matches("^\\d+$"))) %>%
mutate(site = if_else(str_detect(site, "^Site \\d+"),
str_extract(site, "^Site \\d+"),
site), # dealing with NPA sites that have site numbers with different name variations afterwards
site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
if_else(str_detect(site, "Redonda"), "Redonda",
"NEMMA"))) %>%
filter(year != 2022) %>% # 2022 was an incomplete data collection year for NPA
group_by(site, year) %>%
filter(n_distinct(transect) >= 8) %>% # keeping only sites with at least 8 transects per year
group_by(site) %>%
filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
ungroup()
# check that all transects are present even if no fish per family were recorded - should have 0 entries if all is well
# chk <- full_join(serr_length_trans %>%
# group_by(year, site) %>%
# summarize(count = n()),
# fish_bm_trans %>%
# group_by(year, site) %>%
# summarize(count = n()),
# by = c("year", "site"), suffix = c("_length", "_bm")) %>%
# filter(count_length != count_bm | is.na(count_length) | is.na(count_bm))
# transform data for summaries
serr_length_long <- serr_length_trans %>%
filter(nf != 0) %>%
pivot_longer(
cols = matches("^\\d+_?\\d?"),
names_to = "length_mid",
values_to = "percent"
) %>%
mutate(
# convert bin names like "0_5" -> 2.5
length_mid = str_extract(length_mid, "\\d+_?\\d*"),
length_mid = ifelse(str_detect(length_mid, "_"),
sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
as.numeric(length_mid))) %>%
group_by(year, site, transect) %>%
mutate(prop = percent / sum(percent)) %>% # accounts for small deviations where total proportions don't equal exactly 1
ungroup() %>%
select(-percent)
serr_length_site <- serr_length_long %>%
group_by(year, site, site_cat, length_mid) %>%
summarise(mean_prop = mean(prop), .groups = "drop")
serr_length_year <- serr_length_site %>%
group_by(year, site_cat, length_mid) %>%
summarise(mean_prop = mean(mean_prop), .groups = "drop")ggplot(serr_length_year %>%
mutate(year = as.character(year)),
aes(x = length_mid, weight = mean_prop, group = year, color = year, fill = year)) +
geom_density(alpha = 0.3, adjust = 1) +
facet_wrap(~ site_cat, ncol = 2) +
labs(x = "Fish length (cm) - Serranidae", y = "Density") +
theme_minimal()No statistically significant relationships found (including when scarid and acanthurids are aggregated)
##
## Call:
## lm(formula = fma_mean ~ scar_mean, data = benth_fish_site)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.23116 -0.11016 -0.01080 0.08115 0.38401
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.083e-01 3.988e-02 7.730 3.71e-09 ***
## scar_mean -3.646e-05 2.748e-05 -1.327 0.193
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1393 on 36 degrees of freedom
## Multiple R-squared: 0.04664, Adjusted R-squared: 0.02016
## F-statistic: 1.761 on 1 and 36 DF, p-value: 0.1928
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]
ggplot(benth_fish_site, aes(x = scar_mean, y = fma_mean)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
labs(x = "Scarid biomass", y = "FMA cover") +
annotate("text", x = Inf, y = Inf,
label = paste0("R² = ", round(r2, 2),
"\nP = ", signif(pval, 2)),
hjust = 1.1, vjust = 1.2, size = 4) +
theme_bw()##
## Call:
## lm(formula = fma_mean ~ herb_mean, data = benth_fish_site)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.20618 -0.10757 -0.01420 0.07757 0.41048
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.203e-01 4.543e-02 7.051 2.82e-08 ***
## herb_mean -2.266e-05 1.607e-05 -1.410 0.167
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1389 on 36 degrees of freedom
## Multiple R-squared: 0.05235, Adjusted R-squared: 0.02602
## F-statistic: 1.989 on 1 and 36 DF, p-value: 0.1671
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]
ggplot(benth_fish_site, aes(x = herb_mean, y = fma_mean)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
labs(x = "Herbivore biomass", y = "FMA cover") +
annotate("text", x = Inf, y = Inf,
label = paste0("R² = ", round(r2, 2),
"\nP = ", signif(pval, 2)),
hjust = 1.1, vjust = 1.2, size = 4) +
theme_bw()##
## Call:
## lm(formula = lc_mean ~ scar_mean, data = benth_fish_site)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.05273 -0.02691 -0.01161 0.01497 0.15619
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.675e-02 1.209e-02 3.869 0.000442 ***
## scar_mean 6.717e-06 8.326e-06 0.807 0.425141
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04221 on 36 degrees of freedom
## Multiple R-squared: 0.01776, Adjusted R-squared: -0.009529
## F-statistic: 0.6508 on 1 and 36 DF, p-value: 0.4251
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]
ggplot(benth_fish_site, aes(x = scar_mean, y = lc_mean)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
labs(x = "Scarid biomass", y = "Live coral cover") +
annotate("text", x = Inf, y = Inf,
label = paste0("R² = ", round(r2, 2),
"\nP = ", signif(pval, 2)),
hjust = 1.1, vjust = 1.2, size = 4) +
theme_bw()##
## Call:
## lm(formula = lc_mean ~ herb_mean, data = benth_fish_site)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.045407 -0.028903 -0.009291 0.014008 0.156349
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.741e-02 1.392e-02 4.124 0.00021 ***
## herb_mean -1.069e-06 4.924e-06 -0.217 0.82931
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04256 on 36 degrees of freedom
## Multiple R-squared: 0.001308, Adjusted R-squared: -0.02643
## F-statistic: 0.04716 on 1 and 36 DF, p-value: 0.8293
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]
ggplot(benth_fish_site, aes(x = herb_mean, y = lc_mean)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
labs(x = "Herbivore biomass", y = "Live coral cover") +
annotate("text", x = Inf, y = Inf,
label = paste0("R² = ", round(r2, 2),
"\nP = ", signif(pval, 2)),
hjust = 1.1, vjust = 1.2, size = 4) +
theme_bw()Testing correlation between scarid biomass and benthic variables - nothing is notably strong
benth_vars <- benth_fish_site %>%
select(lc_mean, cca_mean, ta_mean, tas_mean, fma_mean, cyan_mean, ainv_mean, peys_mean)
cor_results <- cor(benth_vars, benth_fish_site$scar_mean, use = "pairwise.complete.obs")
cor_results## [,1]
## lc_mean 0.13324977
## cca_mean 0.38089582
## ta_mean 0.21085969
## tas_mean -0.18002397
## fma_mean -0.21595995
## cyan_mean 0.25669404
## ainv_mean 0.36476851
## peys_mean -0.01637593
benth_mat <- benth_fish_site %>%
select(lc_mean, cca_mean, ta_mean, tas_mean, fma_mean)
adonis2(benth_mat ~ scar_mean + site_cat + year,
data = benth_fish_site,
method = "bray",
permutations = 999,
by = "term")## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = benth_mat ~ scar_mean + site_cat + year, data = benth_fish_site, permutations = 999, method = "bray", by = "term")
## Df SumOfSqs R2 F Pr(>F)
## scar_mean 1 0.15087 0.05318 5.3525 0.005 **
## site_cat 1 1.13209 0.39906 40.1633 0.001 ***
## year 1 0.59559 0.20994 21.1298 0.001 ***
## Residual 34 0.95837 0.33782
## Total 37 2.83693 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res <- adonis2(benth_mat ~ scar_mean + site_cat + year,
data = benth_fish_site,
method = "bray",
permutations = 999,
by = "term")
res_df <- broom::tidy(res)
ggplot(res_df, aes(x = reorder(term, R2), y = R2)) +
geom_col(fill = "skyblue") +
coord_flip() +
labs(x = "Predictor", y = "Proportion of variation explained (R²)") +
theme_bw()access_meta_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Transect") %>%
clean_names() %>%
left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Survey") %>%
clean_names() %>%
rename(survey = id)) %>%
select(transect = id, date = surveyed, site = name)
# checking percent that are unknown
chk <- coral_spp_hist %>%
mutate(category = if_else(species == "unkn", "unknown", "known")) %>%
group_by(site, year, category) %>%
summarize(cover = sum(avg))
# old biomass import code - holding for now
fish_bm_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishBiomass", "FishBiomassByTransect.xlsx"), sheet = "Data") %>%
clean_names() %>%
filter(!is.na(site)) %>%
mutate(year = year(date)) %>%
select(site, year, transect = trans, t_tot = t, t_scar = parr, t_acan = surg, t_serr = grou) %>%
rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishBiomassByTransect.xlsx"), sheet = "Overall") %>%
clean_names() %>%
mutate(year = year(surveyed),
survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name)) %>% # inconsistency in site name between 2017 and 2025
select(site = survey_name, year, transect = transect_name, t_tot = all, t_scar, t_acan, t_serr)) %>%
rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishBiomassByTransect.xlsx"), sheet = "Overall") %>%
clean_names() %>%
mutate(year = year(surveyed)) %>%
select(site = survey_name, year, transect = transect_name, t_tot = all, t_scar, t_acan, t_serr)) %>%
mutate(t_herb = t_scar + t_acan,
site = if_else(str_detect(site, "^Site \\d+"),
str_extract(site, "^Site \\d+"),
site), # dealing with NPA sites that have site numbers with different name variations afterwards
site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
if_else(str_detect(site, "Redonda"), "Redonda",
"NEMMA"))) %>%
mutate(year = if_else(year == 2024, 2025, year)) %>% # Site 11 had one erroneous 2024 date
filter(year != 2022) %>% # 2022 was an incomplete data collection year for NPA
group_by(site, year) %>%
filter(n_distinct(transect) >= 8) %>% # keeping only sites with at least 8 transects per year
group_by(site) %>%
filter(n_distinct(year) > 1) %>% # keeping only sites with over time comparisons
ungroup()
fish_bm_site <- fish_bm_trans %>%
group_by(site, site_cat, year) %>%
rename_with(~ str_remove(.x, "t_")) %>%
summarise(across(c(tot, herb, scar, acan, serr),
list(mean = ~ mean(.x, na.rm = TRUE),
se = ~ se(.x)),
.names = "{.col}_{.fn}")) %>%
ungroup()
fish_bm_year <- fish_bm_site %>%
select(!contains("_se")) %>%
rename_with(~ str_remove(.x, "_mean")) %>%
group_by(site_cat, year) %>%
summarise(across(c(tot, herb, scar, acan, serr),
list(mean = ~ mean(.x, na.rm = TRUE),
se = ~ se(.x)),
.names = "{.col}_{.fn}")) %>%
ungroup()
# importing and merging coral spp sheets for 2025
sheets <- excel_sheets(file_coral)
sheets_to_join <- setdiff(sheets, c("TermsOfUse", "Metadata", "Overall")) # exclude two sheets before merging remainder
sheets_list <- map(sheets_to_join, ~ read_excel(file_coral, sheet = .x) %>%
rename_with(~ gsub("([A-Za-z]{4})(avg|med|std)$", "\\1_\\2", .x)) %>%
rename_with(~ gsub("^(t)([A-Za-z]{4})(ave|med|std)$", "\\1_\\2_\\3", .x)) %>%
clean_names())
coral_spp_2025 <- reduce(sheets_list, left_join, by = c("survey_id", "code", "name", "nt"))
# benthic ~ fish by year
benth_fish_year <- benth_fish_site %>%
select(-contains("_se")) %>%
rename_with(~ str_remove(.x, "_mean$")) %>%
group_by(year, site_cat) %>%
summarise(
across(
where(is.numeric),
list(mean = ~mean(.x, na.rm = TRUE),
se = ~se(.x)),
.names = "{.col}_{.fn}"
),
.groups = "drop"
)
# benthic NMDS NEMMA only
benthic_nmds <- benthic_cover_site %>%
filter(site_cat == "NEMMA") %>%
select(-site_cat,
-contains("_sd"),
-contains("cyan"),
-contains("peys")) %>%
rename_with(~ gsub("_mean", "", .x)) %>%
mutate(year = as.character(year))
benthic_meta <- benthic_nmds %>% ungroup %>% select(site, year)
benthic_pc <- benthic_nmds %>% ungroup %>% select(-site, -year)
set.seed(123) # fixes random number generator so results are reproducible
benthic_nmds_ord <- metaMDS(benthic_pc, distance = "bray", k = 2, trymax = 100, trace = 0)
site_scores <- scores(benthic_nmds_ord, display = "sites") %>%
as.data.frame() %>%
mutate(site = benthic_meta$site,
year = benthic_meta$year)
pc_scores <- scores(benthic_nmds_ord, display = "species") %>%
as.data.frame() %>%
rownames_to_column(var = "category")
arrow_mult <- 2
pc_scores_scaled <- pc_scores %>%
mutate(NMDS1 = NMDS1 * arrow_mult,
NMDS2 = NMDS2 * arrow_mult)
pc_scores_scaled <- pc_scores_scaled %>%
mutate(
label_nudge_x = ifelse(NMDS1 >= 0, 0.05, -0.05),
label_nudge_y = ifelse(NMDS2 >= 0, 0.05, -0.05)
)
ggplot(site_scores, aes(x = NMDS1, y = NMDS2, color = year)) +
geom_point(size = 4) +
scale_shape_manual(values = 0:25) +
stat_ellipse(aes(group = year), type = "t", linetype = 2) +
geom_segment(data = pc_scores_scaled,
aes(x = 0, y = 0, xend = NMDS1, yend = NMDS2),
arrow = arrow(length = unit(0.3, "cm")),
color = "black",
inherit.aes = FALSE) +
geom_text(data = pc_scores_scaled,
aes(x = NMDS1 + label_nudge_x,
y = NMDS2 + label_nudge_y,
label = category),
color = "black",
inherit.aes = FALSE) +
theme_minimal()
set.seed(123)
adonis_res <- adonis2(benthic_pc ~ year,
data = benthic_meta,
method = "bray",
permutations = 999)
print(adonis_res)